import ollama
import time


def ollama_chat_stream(prompt: str, model: str = "deepseek-r1:7b"):
    try:
        # 启用流式输出并调整参数
        response = ollama.generate(
            model=model,
            prompt=prompt,
            stream=True,  # 启用流式传输
            options={
                "temperature": 0.7,
                "num_predict": 512,
                "top_p": 0.9
            }
        )

        # 逐块处理响应
        for chunk in response:
            text_chunk = chunk.get("response", "")
            # 逐字输出
            for char in text_chunk:
                print(char, end='', flush=True)  # 关键：即时刷新输出
                time.sleep(0.1)  # 控制输出速度
        print()  # 最终换行

    except Exception as e:
        print(f"\nError: {str(e)}")


# 修改后的主程序
if __name__ == '__main__':
    while True:
        user_input = input("You: ")
        if user_input.lower() in ['q', 'exit', 'quit']:
            break

        print("AI: ", end='', flush=True)  # 预输出前缀
        ollama_chat_stream(user_input)